//-----------------------------------------------------------------------------
//
//Copyright(c) 2020, ThorsianWay Technologies Co, Ltd
//All rights reserved.
//
//IP Name       :   raster
//File Name     :   set_up.v
//Module name   :   set_up
//Full name     :   set up parameters for raster
//
//Author        :   zha daolu
//Email         :   
//Data          :   2020/6/1
//Version       :   V1.00
//
//Abstract      :   
//                  
//Called  by    :   GPU
//
//Modification history
//-----------------------------------------------------
//1.00: intial version 
//
//-----------------------------------------------------------------------------

//-----------------------------
//DEFINE MACRO
//-----------------------------  
module set_up
(
    input						clk,
    input						rst_n,
    input						bte_start,
    input[31:0]					tile_x,
    input[31:0]					tile_y,
    input signed [47:0] vertex0_x               ,
    input signed [47:0] vertex0_y               ,
    input signed [47:0] vertex0_z               ,
    input signed [47:0] vertex0_w               ,
    input signed [31:0] vertex0_s0              ,
    input signed [31:0] vertex0_t0              ,
    input [7:0]  vertex0_primary_r       ,
    input [7:0]  vertex0_primary_g      ,
    input [7:0]  vertex0_primary_b      ,
    input [7:0]  vertex0_primary_a      ,
    input signed [47:0] vertex1_x               ,
    input signed [47:0] vertex1_y               ,
    input signed [47:0] vertex1_z               ,
    input signed [47:0] vertex1_w               ,
    input signed [31:0] vertex1_s0              ,
    input signed [31:0] vertex1_t0              ,
    input [7:0]  vertex1_primary_r      , 
    input [7:0]  vertex1_primary_g      ,
    input [7:0]  vertex1_primary_b      ,
    input [7:0]  vertex1_primary_a      , 
    input signed [47:0] vertex2_x               ,
    input signed [47:0] vertex2_y               ,
    input signed [47:0] vertex2_z               ,
    input signed [47:0] vertex2_w               ,
    input signed [31:0] vertex2_s0              ,
    input signed [31:0] vertex2_t0              ,
    input [7:0]  vertex2_primary_r       ,
    input [7:0]  vertex2_primary_g      ,
    input [7:0]  vertex2_primary_b      ,
    input [7:0]  vertex2_primary_a      ,
    output reg signed[95:0]		ee0_int,
    output reg signed[95:0]		ee1_int,
    output reg signed[95:0]		ee2_int,
    output reg signed[47:0]		a0,
    output reg signed[47:0]		b0,
    output reg signed[47:0]		a1,
    output reg signed[47:0]		b1,
    output reg signed[47:0]		a2,
    output reg signed[47:0]		b2,
    output reg					block_start,
    output reg[31:0]			tile_x_out,
    output reg[31:0]			tile_y_out,
    output reg signed [47:0] dz_02                        ,
    output reg signed [9:0]  dprimary_r_02                ,
    output reg signed [9:0]  dprimary_g_02                ,
    output reg signed [9:0]  dprimary_b_02                ,
    output reg signed [9:0]  dprimary_a_02                ,
    output reg signed [47:0] dz_12                        ,
    output reg signed [9:0]  dprimary_r_12                ,
    output reg signed [9:0]  dprimary_g_12                ,
    output reg signed [9:0]  dprimary_b_12                ,
    output reg signed [9:0]  dprimary_a_12                ,
    output reg [95:0] area                                ,
    output  [31:0] z_slope                                ,
    output reg signed [31:0] ds0_s2                           ,
    output reg signed [31:0] dt0_t2                           ,
    output reg signed [31:0] dw0_w2                           ,
    output reg signed [31:0] ds1_s2                           ,
    output reg signed [31:0] dt1_t2                           ,
    output reg signed [31:0] dw1_w2                           , 
    output reg signed [31:0] w2                           ,
    output reg signed [31:0] s2                           ,
    output reg signed [31:0] t2
    );

wire signed[47:0]	x0=vertex0_x;
wire signed[47:0]	y0=vertex0_y;
wire signed[47:0]   z0=vertex0_z;  
wire signed[47:0]	x1=vertex1_x;
wire signed[47:0]	y1=vertex1_y;
wire signed[47:0]	z1=vertex1_z;  
wire signed[47:0]	x2=vertex2_x;
wire signed[47:0]	y2=vertex2_y;
wire signed[47:0]   z2=vertex2_z; 


wire signed[95:0]	x0y1;
wire signed[95:0]	x1y0;
wire signed[95:0]	x1y2;
wire signed[95:0]	x2y1;
wire signed[95:0]	x2y0;
wire signed[95:0]	x0y2;
wire signed[95:0]   a2b0;
wire signed[95:0]   a0b2;
wire signed[95:0]   a0x;
wire signed[95:0]   b0y;
wire signed[95:0]   a1x;
wire signed[95:0]   b1y;
wire signed[95:0]   a2x;
wire signed[95:0]   b2y;
wire signed[95:0]   a0z2;
wire signed[95:0]   a1z0;
wire signed[95:0]   a2z1;
wire signed[95:0]   b0z2;
wire signed[95:0]   b1z0;
wire signed[95:0]   b2z1;

//wire area_rcp_en;


reg bte_start_ff1;
reg bte_start_ff2;
reg bte_start_ff3;
reg bte_start_ff4;
reg bte_start_ff5;
reg bte_start_ff6;
reg bte_start_ff7;
reg bte_start_ff8;

wire mul_valid;
reg  mul_valid_ff1;

mult_signed_48x48_normal uut_mult_signed_x0y1
(
    .clk  (clk),
    .start(bte_start_ff1),
    .rst_n(rst_n),
    .a    (x0),
    .b    (y1),
    .out  (x0y1),
    .valid(mul_valid)
);

mult_signed_48x48_normal uut_mult_signed_x1y0
(
    .clk  (clk),
    .start(bte_start_ff1), 
    .rst_n(rst_n),
    .a    (x1),
    .b    (y0),
    .out  (x1y0)
);

mult_signed_48x48_normal uut_mult_signed_x1y2
(
    .clk  (clk),
    .start(bte_start_ff1), 
    .rst_n(rst_n),
    .a    (x1),
    .b    (y2),
    .out  (x1y2)
);

mult_signed_48x48_normal uut_mult_signed_x2y1
(
    .clk  (clk),
    .start(bte_start_ff1), 
    .rst_n(rst_n),
    .a    (x2),
    .b    (y1),
    .out  (x2y1)
);

mult_signed_48x48_normal uut_mult_signed_x2y0
(
    .clk  (clk),
    .start(bte_start_ff1), 
    .rst_n(rst_n),
    .a    (x2),
    .b    (y0),
    .out  (x2y0)
);

mult_signed_48x48_normal uut_mult_signed_x0y2
(
    .clk  (clk),
    .start(bte_start_ff1), 
    .rst_n(rst_n),
    .a    (x0),
    .b    (y2),
    .out  (x0y2)
);

mult_signed_48x48_normal uut_mult_signed_a2b0
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a2),
    .b    (b0),
    .out  (a2b0)
);

mult_signed_48x48_normal uut_mult_signed_a0b2
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a0),
    .b    (b2),
    .out  (a0b2)
);

mult_signed_48x48_normal uut_mult_signed_a0x
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a0),
    .b    ({16'b0,tile_x}),
    .out  (a0x)
);

mult_signed_48x48_normal uut_mult_signed_b0y
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b0),
    .b    ({16'b0,tile_y}),
    .out  (b0y)
);

mult_signed_48x48_normal uut_mult_signed_a1x
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a1),
    .b    ({16'b0,tile_x}),
    .out  (a1x)
);

mult_signed_48x48_normal uut_mult_signed_b1y
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b1),
    .b    ({16'b0,tile_y}),
    .out  (b1y)
);

mult_signed_48x48_normal uut_mult_signed_a2x
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a2),
    .b    ({16'b0,tile_x}),
    .out  (a2x)
);

mult_signed_48x48_normal uut_mult_signed_b2y
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b2),
    .b    ({16'b0,tile_y}),
    .out  (b2y)
);


mult_signed_48x48_normal uut_mult_signed_a0z2
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a0),
    .b    (z2),
    .out  (a0z2)
); 

mult_signed_48x48_normal uut_mult_signed_a1z0
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a1),
    .b    (z0),
    .out  (a1z0)
); 

mult_signed_48x48_normal uut_mult_signed_a2z1
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (a2),
    .b    (z1),
    .out  (a2z1)
);

mult_signed_48x48_normal uut_mult_signed_b0z2
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b0),
    .b    (z2),
    .out  (b0z2)
); 

mult_signed_48x48_normal uut_mult_signed_b1z0
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b1),
    .b    (z0),
    .out  (b1z0)
); 

mult_signed_48x48_normal uut_mult_signed_b2z1
(
    .clk  (clk),
    .rst_n(rst_n),
    .start(bte_start_ff1), 
    .a    (b2),
    .b    (z1),
    .out  (b2z1)
);


//parameter for edge functions
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		begin
			a0 <= 48'h0;
			b0 <= 48'h0;	
			a1 <= 48'h0;
			b1 <= 48'h0;
			a2 <= 48'h0;
			b2 <= 48'h0;		
		end
	else if(bte_start)
		begin
			a0 <= $signed(($signed(y0)) - ($signed(y1)));
			b0 <= $signed(($signed(x1)) - ($signed(x0)));	
			a1 <= $signed(($signed(y1)) - ($signed(y2)));
			b1 <= $signed(($signed(x2)) - ($signed(x1)));
			a2 <= $signed(($signed(y2)) - ($signed(y0)));
			b2 <= $signed(($signed(x0)) - ($signed(x2)));
		end
end

//difference for interpolaration
always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
	begin
		dz_02                        <= 48'b0;
		dprimary_r_02                <=  9'b0;
		dprimary_g_02                <=  9'b0;
		dprimary_b_02                <=  9'b0;
		dprimary_a_02                <=  9'b0;
		dz_12                        <= 48'b0;
		dprimary_r_12                <=  9'b0;
		dprimary_g_12                <=  9'b0;
		dprimary_b_12                <=  9'b0;
		dprimary_a_12                <=  9'b0;
	end
	else if(bte_start)
	begin
		dz_02                        <= $signed($signed(vertex0_z ) - $signed(vertex2_z ));
		dprimary_r_02                <= vertex0_primary_r - vertex2_primary_r;
		dprimary_g_02                <= vertex0_primary_g - vertex2_primary_g;
		dprimary_b_02                <= vertex0_primary_b - vertex2_primary_b;
		dprimary_a_02                <= vertex0_primary_a - vertex2_primary_a;
		dz_12                        <= $signed($signed(vertex1_z  ) - $signed(vertex2_z ));
		dprimary_r_12                <= vertex1_primary_r - vertex2_primary_r;
		dprimary_g_12                <= vertex1_primary_g - vertex2_primary_g;
		dprimary_b_12                <= vertex1_primary_b - vertex2_primary_b;
		dprimary_a_12                <= vertex1_primary_a - vertex2_primary_a;
	end
end

always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		begin
			tile_x_out <= 32'b0;
			tile_y_out <= 32'b0;
		end
	else if(bte_start)
		begin
			tile_x_out <= tile_x;
			tile_y_out <= tile_y;
        end

end


//edge function value of tile left-bottom 
always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		begin
			ee0_int <= 96'b0;
			ee1_int <= 96'b0;
			ee2_int <= 96'b0;
		end
	else if(mul_valid)
		begin
			ee0_int <= ($signed(a0x)+$signed(b0y)+$signed(x0y1)-$signed(x1y0));
			ee1_int <= ($signed(a1x)+$signed(b1y)+$signed(x1y2)-$signed(x2y1));
			ee2_int <= ($signed(a2x)+$signed(b2y)+$signed(x2y0)-$signed(x0y2));
        end
end

//area * 2 for triangle
always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		begin
			area <= 96'b0;
		end
	else if(mul_valid)
		begin
			area <= $signed($signed(a2b0)-$signed(a0b2));
        end
end

//calculate depth slope for depth offset
// z(x,y) = (ee0(x,y) * z2 + ee1(x,y) * z0 + ee2(x,y) * z1)/area
// dz/dx = (d(ee0(x,y))/dx * z2 + d(ee1(x,y))/dx * z0 + d(ee2(x,y))/dx * z1 =  a0z2 + a1z0 + a2z1)/area
// dz/dy = (d(ee0(x,y))/dy * z2 + d(ee1(x,y))/dy * z0 + d(ee2(x,y))/dy * z1 =  b0z2 + b1z0 + b2z1)/area   
wire [96:0] dzdx_x_area = $signed(a0z2) + $signed(a1z0) + $signed(a2z1);
wire [96:0] dzdy_x_area = $signed(b0z2) + $signed(b1z0) + $signed(b2z1);


reg [95:0] abs_dzdx_x_area;// = dzdx_x_area[96] ? -dzdx_x_area : dzdx_x_area;
reg [95:0] abs_dzdy_x_area;// = dzdy_x_area[96] ? -dzdx_y_area : dzdx_y_area;

always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        abs_dzdx_x_area <= 96'b0;
        abs_dzdy_x_area <= 96'b0; 
    end
    else if(mul_valid)
    begin
        abs_dzdx_x_area <= dzdx_x_area[96] ? -dzdx_x_area : dzdx_x_area;
        abs_dzdy_x_area <= dzdy_x_area[96] ? -dzdy_x_area : dzdy_x_area;   
    end
end

wire [7:0] abs_dzdx_x_area_lz;
wire [7:0] abs_dzdy_x_area_lz;
wire [7:0] area_lz;

clz_96 clz_abs_dzdx_x_area(
    .clk(clk),
    .rst_n(rst_n),
    .busy(1'b0),
    .Data_en(mul_valid_ff1),
    .Data_in(abs_dzdx_x_area),
    .LZB_out(abs_dzdx_x_area_lz),
    .LZB_valid(lz_valid)
); 

clz_96 clz_abs_dzdy_x_area(
    .clk(clk),
    .rst_n(rst_n),
    .busy(1'b0),
    .Data_en(mul_valid_ff1),
    .Data_in(abs_dzdy_x_area),
    .LZB_out(abs_dzdy_x_area_lz)
);  

clz_96 clz_area(
    .clk(clk),
    .rst_n(rst_n),
    .busy(1'b0),
    .Data_en(mul_valid_ff1),
    .Data_in(area),
    .LZB_out(area_lz)
);    

//cycle 2 calculate exp & man to generate float
wire [7:0] abs_dzdx_x_area_exp = 127 + 31 - abs_dzdx_x_area_lz + 16;
wire [7:0] abs_dzdy_x_area_exp = 127 + 31 - abs_dzdy_x_area_lz + 16;
wire [7:0] area_exp = 127 + 31 - area_lz;
wire [22:0] abs_dzdx_x_area_man =( abs_dzdx_x_area << (abs_dzdx_x_area_lz+1)) >> 73; 
wire [22:0] abs_dzdy_x_area_man =( abs_dzdy_x_area << (abs_dzdy_x_area_lz+1)) >> 73; 
wire [22:0] area_man =( area << (area_lz+1)) >> 73; 

reg [31:0] slope_x_area_float;
reg [31:0] area_float;
reg div_en;

always@(posedge clk or negedge rst_n)
begin
   if(~rst_n)
   begin
        slope_x_area_float  <= 32'b0;
        area_float <= 32'b0;
   end
   else if(lz_valid)
   begin
        slope_x_area_float  <= (abs_dzdx_x_area_lz > abs_dzdy_x_area_lz) ? {1'b0,abs_dzdx_x_area_exp,abs_dzdx_x_area_man} : {1'b0,abs_dzdy_x_area_exp,abs_dzdy_x_area_man};
        area_float <= {1'b0,area_exp,area_man}; 
   end
end

always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        div_en  <= 1'b0;
    end
    else 
    begin
        if(lz_valid)
            div_en <= 1'b1;
        else
            div_en <= 1'b0;
   end
end   


gc_ifdiv u_ee1_div_area(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(div_en),                   //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(slope_x_area_float),                 //input a
    .b(area_float),                //input b
    .type_q(2'b0),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(div_valid),             //output valid
    .quotient(z_slope)                  //output data
);  



always@(posedge clk or negedge rst_n)
begin
	if(~rst_n)
		block_start <= 1'b0;
	else
		block_start <= div_valid;
end



always@(posedge clk or negedge rst_n)
begin
	if(!rst_n)
		begin
			bte_start_ff1 <= 1'b0;
			bte_start_ff2 <= 1'b0;
			bte_start_ff3 <= 1'b0; 
			bte_start_ff4 <= 1'b0;
			bte_start_ff5 <= 1'b0;
			bte_start_ff6 <= 1'b0; 
			bte_start_ff7 <= 1'b0; 
			bte_start_ff8 <= 1'b0; 
            mul_valid_ff1 <= 1'b0;
			
		end
	else
		begin
			bte_start_ff1 <= bte_start;
			bte_start_ff2 <= bte_start_ff1;
			bte_start_ff3 <= bte_start_ff2;
			bte_start_ff4 <= bte_start_ff3;
			bte_start_ff5 <= bte_start_ff4;
			bte_start_ff6 <= bte_start_ff5; 
			bte_start_ff7 <= bte_start_ff6; 
			bte_start_ff8 <= bte_start_ff7; 
            mul_valid_ff1 <= mul_valid;
        end
end


//cal w_rcp, dw_rcp, s_x_wrcp, t_x_wrcp, ds_x_wrcp , dt_x_wrcp for persective correction
wire [31:0] w0_unsigned = vertex0_w[47] ? -vertex0_w : vertex0_w;
wire [31:0] w1_unsigned = vertex1_w[47] ? -vertex1_w : vertex1_w; 
wire [31:0] w2_unsigned = vertex2_w[47] ? -vertex2_w : vertex2_w;
wire [5:0]  w0_lz;
wire [5:0]  w1_lz;
wire [5:0]  w2_lz;

wire [31:0] s0_unsigned = vertex0_s0[31] ? -vertex0_s0 : vertex0_s0;
wire [31:0] s1_unsigned = vertex1_s0[31] ? -vertex1_s0 : vertex1_s0;
wire [31:0] s2_unsigned = vertex2_s0[31] ? -vertex2_s0 : vertex2_s0; 
wire [5:0]  s0_lz;
wire [5:0]  s1_lz;
wire [5:0]  s2_lz; 

wire [31:0] t0_unsigned = vertex0_t0[31] ? -vertex0_t0 : vertex0_t0;
wire [31:0] t1_unsigned = vertex1_t0[31] ? -vertex1_t0 : vertex1_t0;
wire [31:0] t2_unsigned = vertex2_t0[31] ? -vertex2_t0 : vertex2_t0; 
wire [5:0]  t0_lz;
wire [5:0]  t1_lz;
wire [5:0]  t2_lz;   
 

clz u_clz_w0(
    .Data_in(w0_unsigned),
    .LZB_out(w0_lz)
);

clz u_clz_w1(
    .Data_in(w1_unsigned),
    .LZB_out(w1_lz)
);

clz u_clz_w2(
    .Data_in(w2_unsigned),
    .LZB_out(w2_lz)
);  

clz u_clz_s0(
    .Data_in(s0_unsigned),
    .LZB_out(s0_lz)
); 

clz u_clz_s1(
    .Data_in(s1_unsigned),
    .LZB_out(s1_lz)
); 

clz u_clz_s2(
    .Data_in(s2_unsigned),
    .LZB_out(s2_lz)
); 

clz u_clz_t0(
    .Data_in(t0_unsigned),
    .LZB_out(t0_lz)
); 

clz u_clz_t1(
    .Data_in(t1_unsigned),
    .LZB_out(t1_lz)
); 

clz u_clz_t2(
    .Data_in(t2_unsigned),
    .LZB_out(t2_lz)
); 

wire [7:0] w0_exp = 127 + 31 - w0_lz;
wire [7:0] w1_exp = 127 + 31 - w1_lz;
wire [7:0] w2_exp = 127 + 31 - w2_lz; 
wire [22:0] w0_man =(w0_unsigned  << (w0_lz+1) >> 9);
wire [22:0] w1_man =(w1_unsigned  << (w1_lz+1) >> 9);
wire [22:0] w2_man =(w2_unsigned  << (w2_lz+1) >> 9);

wire [7:0] s0_exp = 127 + 31 - s0_lz + 16;
wire [7:0] s1_exp = 127 + 31 - s1_lz + 16;
wire [7:0] s2_exp = 127 + 31 - s2_lz + 16; 
wire [22:0] s0_man =(s0_unsigned  << (s0_lz+1) >> 9);
wire [22:0] s1_man =(s1_unsigned  << (s1_lz+1) >> 9);
wire [22:0] s2_man =(s2_unsigned  << (s2_lz+1) >> 9); 

wire [7:0]  t0_exp = 127 + 31 - t0_lz + 16;
wire [7:0]  t1_exp = 127 + 31 - t1_lz + 16;
wire [7:0]  t2_exp = 127 + 31 - t2_lz + 16; 
wire [22:0] t0_man =(t0_unsigned  << (t0_lz+1) >> 9);
wire [22:0] t1_man =(t1_unsigned  << (t1_lz+1) >> 9);
wire [22:0] t2_man =(t2_unsigned  << (t2_lz+1) >> 9); 

reg [31:0] w0_float;
reg [31:0] w1_float;
reg [31:0] w2_float;
reg [31:0] s0_float;
reg [31:0] s1_float;
reg [31:0] s2_float;  
reg [31:0] t0_float;
reg [31:0] t1_float;
reg [31:0] t2_float;  

always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        w0_float <= 32'b0;
        w1_float <= 32'b0;
        w2_float <= 32'b0;
        s0_float <= 32'b0;
        s1_float <= 32'b0;
        s2_float <= 32'b0;  
        t0_float <= 32'b0;
        t1_float <= 32'b0;
        t2_float <= 32'b0;
    end
    else if(bte_start)
    begin
        w0_float <= {vertex0_w[47],w0_exp,w0_man};
        w1_float <= {vertex1_w[47],w1_exp,w1_man};
        w2_float <= {vertex2_w[47],w2_exp,w2_man};
        s0_float <= {vertex0_s0[31],s0_exp,s0_man};
        s1_float <= {vertex1_s0[31],s1_exp,s1_man};
        s2_float <= {vertex2_s0[31],s2_exp,s2_man};  
        t0_float <= {vertex0_t0[31],t0_exp,t0_man};
        t1_float <= {vertex1_t0[31],t1_exp,t1_man};
        t2_float <= {vertex2_t0[31],t2_exp,t2_man};
    end
end

wire w_rcp_valid;
wire [31:0] one_float = {1'b0,8'd159,23'b0}; //1.15.16 fix one
wire [31:0] w0_rcp;
wire [31:0] w1_rcp;
wire [31:0] w2_rcp;
wire [31:0] s0_d_w0;
wire [31:0] s1_d_w1; 
wire [31:0] s2_d_w2; 
wire [31:0] t0_d_w0;
wire [31:0] t1_d_w1; 
wire [31:0] t2_d_w2;
gc_ifdiv u_w0_rcp(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(one_float),       //input a
    .b(w0_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(w_rcp_valid),             //output valid
    .quotient(w0_rcp)             //output data
);

gc_ifdiv u_w1_rcp(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(one_float),       //input a
    .b(w1_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(w1_rcp)             //output data
);  

gc_ifdiv u_w2_rcp(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(one_float),       //input a
    .b(w2_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(w2_rcp)             //output data
);

gc_ifdiv u_s0_div_w0(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(s0_float),       //input a
    .b(w0_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(s0_d_w0)             //output data
);  

gc_ifdiv u_t0_div_w0(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(t0_float),       //input a
    .b(w0_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(t0_d_w0)             //output data
);  

gc_ifdiv u_s1_div_w1(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(s1_float),       //input a
    .b(w1_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(s1_d_w1)             //output data
);  

gc_ifdiv u_t1_div_w1(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(t1_float),       //input a
    .b(w1_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(t1_d_w1)             //output data
);  

gc_ifdiv u_s2_div_w2(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(s2_float),       //input a
    .b(w2_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(s2_d_w2)             //output data
);  

gc_ifdiv u_t2_div_w2(
    .clk(clk),                     //input clock
    .rst_n(rst_n),                 //input reset, low active
    .busy(1'b0),                   //input busy
    .en(bte_start_ff1),                  //input enable
    .type_a(2'b10),                //input a type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .type_b(2'b10),                //input b type,2'b10:float 2'b01:signed 2'b00:unsigned 
    .a(t2_float),       //input a
    .b(w2_float),                  //input b
    .type_q(2'b10),                 //output b type,2'b10:float 2'b01:signed 2'b00:unsigned
    .valid(),             //output valid
    .quotient(t2_d_w2)             //output data
);  


always@(posedge clk or negedge rst_n)
begin
    if(~rst_n)
    begin
        w2      <= 32'b0;                     
        s2      <= 32'b0;                     
        t2      <= 32'b0;
    end
    else if(w_rcp_valid)
    begin
        w2      <= w2_rcp;                     
        s2      <= s2_d_w2;                     
        t2      <= t2_d_w2; 
    end
end

float_add u_s0_m_s2(
    .clk(clk),
    .rst_n(rst_n),
    .a(s0_d_w0),
    .b({~s2_d_w2[31],s2_d_w2[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(ds0_s2),
    .valid()
); 
                         
float_add u_t0_m_t2(
    .clk(clk),
    .rst_n(rst_n),
    .a(t0_d_w0),
    .b({~t2_d_w2[31],t2_d_w2[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(dt0_t2),
    .valid()
);

float_add u_w0_m_w2(
    .clk(clk),
    .rst_n(rst_n),
    .a(w0_rcp),
    .b({~w2_rcp[31],w2_rcp[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(dw0_w2),
    .valid()
); 


float_add u_s1_m_s2(
    .clk(clk),
    .rst_n(rst_n),
    .a(s1_d_w1),
    .b({~s2_d_w2[31],s2_d_w2[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(ds1_s2),
    .valid()
); 
                         
float_add u_t1_m_t2(
    .clk(clk),
    .rst_n(rst_n),
    .a(t1_d_w1),
    .b({~t2_d_w2[31],t2_d_w2[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(dt1_t2),
    .valid()
);

float_add u_w1_m_w2(
    .clk(clk),
    .rst_n(rst_n),
    .a(w1_rcp),
    .b({~w2_rcp[31],w2_rcp[30:0]}),
    .add_en(w_rcp_valid),
    .busy(1'b0),
    .z(dw1_w2),
    .valid()
);  
endmodule

